
  ___  ____  ____  ____  ____ ®
 /__    /   ____/   /   ____/      Stata 18.0
___/   /   /___/   /   /___/       MP—Parallel Edition

 Statistics and Data Science       Copyright 1985-2023 StataCorp LLC
                                   StataCorp
                                   4905 Lakeway Drive
                                   College Station, Texas 77845 USA
                                   800-782-8272        https://www.stata.com
                                   979-696-4600        service@stata.com

Stata license: 745-user 2-core network, expiring 30 Jun 2025
Serial number: 501809309429
  Licensed to: Noah Sobel-Lewin
               University of Chicago

Notes:
      1. Stata is running in batch mode.
      2. Unicode is supported; see help unicode_advice.
      3. More than 2 billion observations are allowed; see help obs_advice.
      4. Maximum number of variables is set to 5,000 but can be increased;
          see help set_maxvar.

. do 01Code/model/cleaning/00MainStata.do 

. /************************************************************************
> Purpose: Master do-file for constructing data for structural model
>    *************************************************************************/
. 
. clear all

. clear matrix

. set more off

. set linesize 255

. pause on

. 
. global CODE_DIR = "01Code/model"

. 
. global MODEL_DATA_OUT = "02DataPipeline/model"

. 
. global MISC_DATA_IN = "00RawData/misc"

. global PHONE_DATA_IN = "00RawData/phone_survey"

. global PHONE_DATA_OUT = "02DataPipeline/phone_survey"

. global BASELINE_DATA_IN = "00RawData/baseline"

. global BASELINE_DATA_OUT = "02DataPipeline/baseline"

. global EMISSIONS_DATA_OUT = "02DataPipeline/emissions"

. global TRADING_DATA_IN = "00RawData/trading"

. global TRADING_DATA_OUT = "02DataPipeline/trading/intermediate"

. global TRADING_DATA_CLEAN = "02DataPipeline/trading/cleaned"

. 
. ** We use a conversion rate of USD 1 to INR 70, as of 2 Jan 2019, 
. ** Source: The Fed (https://www.federalreserve.gov/releases/h10/20190107/)
. global USD2INR=70

. 
. ** We use a conversion rate of USD 1 to INR 70, as of 2 Jan 2019, 
. ** Source: The Fed (https://www.federalreserve.gov/releases/h10/20190107/)
. global USD2INR=70

. 
. **********************************************************************
. 
. do "$CODE_DIR/cleaning/prepare_structural_model_input.do"

. /************************************************************************
> Purpose: Preparing data sets for ETS structural model   
> *************************************************************************/
. 
. clear all

. clear matrix

. set more off

. set linesize 255

. pause on

. 
. use "$PHONE_DATA_OUT/apcd_panel_plant.dta", clear
(337 STACKS. MASTER T&C BALANCED PANEL. HIST CALIB FACTORS.)

. 
. ** Merge CEMS emissions data used for emissions analysis
. merge 1:m gpcb_id using "$EMISSIONS_DATA_OUT/pm_mass_plant-period.dta"

    Result                      Number of obs
    -----------------------------------------
    Not matched                            26
        from master                        26  (_merge==1)
        from using                          0  (_merge==2)

    Matched                             2,920  (_merge==3)
    -----------------------------------------

. * 292 plants x 10 periods (included in analysis) + 26 plants (not in analysis)
. * = 2,946 observations
. drop _merge treatmentstatus

. order period_num, after(gpcb_name)

. sort gpcb_id period_num

. 
. ** Merge periods data
. rename period_num period

. merge m:1 period using "$TRADING_DATA_OUT/index_period.dta"

    Result                      Number of obs
    -----------------------------------------
    Not matched                            26
        from master                        26  (_merge==1)
        from using                          0  (_merge==2)

    Matched                             2,920  (_merge==3)
    -----------------------------------------

. drop _merge

. order period, after(apcd_unitcost_ope_esp)

. 
. ** Prorate emissions based on compliance period length
. local rule_num 0 A B

. foreach r of local rule_num{
  2.         gen pm_mass_val_`r'_prorated = pm_mass_val_`r' / compliance_length * 30
  3.         label var pm_mass_val_`r'_prorated "Plant-period Validated Mass Emissions Prorated (kg / month) - Rule `r'"
  4. } 
(323 missing values generated)
(26 missing values generated)
(26 missing values generated)

. drop pm_mass_val_0 pm_mass_val_A pm_mass_val_B

. 
. ** Merge with potential maximum emissions
. merge m:1 gpcb_id using "$EMISSIONS_DATA_OUT/potential_max_emissions.dta", ///
>         keepusing(AverageUncontrolledMass12 AverageUncontrolledMass16)

    Result                      Number of obs
    -----------------------------------------
    Not matched                            26
        from master                        26  (_merge==1)
        from using                          0  (_merge==2)

    Matched                             2,920  (_merge==3)
    -----------------------------------------

. drop _merge

. rename AverageUncontrolledMass12 pm_mass_potential_max_12

. rename AverageUncontrolledMass16 pm_mass_potential_max_16

. * Check with Prajval if these are already prorated?
. label var pm_mass_potential_max_12 "Plant-period Potential Maximum Mass Emissions (kg / month) - 12 hours"

. label var pm_mass_potential_max_16 "Plant-period Potential Maximum Mass Emissions (kg / month) - 16 hours"

. 
. ** Merge with trading data 
. rename period period_num

. merge 1:1 gpcb_id period_num using "$TRADING_DATA_CLEAN/panel_plant-period.dta"

    Result                      Number of obs
    -----------------------------------------
    Not matched                         1,386
        from master                     1,386  (_merge==1)
        from using                          0  (_merge==2)

    Matched                             1,560  (_merge==3)
    -----------------------------------------

. rename _merge D_trade

. label define D_trade 0 "Not in trade data" 1 "Present in trade data"

. replace D_trade = 0 if D_trade==1
(1,386 real changes made)

. replace D_trade = 1 if D_trade==3
(1,560 real changes made)

. label values D_trade D_trade

. label var D_trade "=1 if present in trade data"

. order D_trade, after(D_analysis)

. drop period_end compliance_end period_cap_prorated compliance_cap

. rename compliance_month_cap period_cap_prorated

. 
. ** Add apcd_max variable
. gen apcd_max = 0

. replace apcd_max = 1 if (apcd_present_cyclone == 1) & (apcd_present_bagfilter == 0) & (apcd_present_scrubber == 0) & (apcd_present_esp == 0)
(41 real changes made)

. replace apcd_max = 2 if (apcd_present_bagfilter == 1) & (apcd_present_scrubber == 0) & (apcd_present_esp == 0)
(786 real changes made)

. replace apcd_max = 3 if (apcd_present_scrubber == 1) & (apcd_present_esp == 0)
(1,760 real changes made)

. replace apcd_max = 4 if (apcd_present_esp == 1)
(282 real changes made)

. 
. ** Data notes for Nick, and to-do items.
. * APCD maint/ope costs = 6% and 3% resp of installation cost; may update these values
. * EDC price = set fixed at Rs. 200 based on manual; don't have actual.
. * EDC amount = not actual, but estimated. Also pro-rated. 
. * permit_trade_prorated = inv + cons - alloc // theoretically should be: purchased - sold, but it is not.
. * We used Rs. because it makes more sense for permits right now.
. 
. ** Create a new index for plants from 1 to 318
. preserve

. sort gpcb_id

. by gpcb_id: keep if _n==1
(2,628 observations deleted)

. sort gpcb_id

. gen id_plant = _n

. rename gpcb_id id_gpcb

. keep id_gpcb id_plant

. duplicates drop

Duplicates in terms of all variables

(0 observations are duplicates)

. tempfile gpcb_ids

. save `gpcb_ids'
file /var/folders/7r/1zsrb09s0r578bq3nysvhzd40000gn/T//St40526.000002 saved as .dta format

. restore

. rename gpcb_id id_gpcb

. merge m:1 id_gpcb using `gpcb_ids'

    Result                      Number of obs
    -----------------------------------------
    Not matched                             0
    Matched                             2,946  (_merge==3)
    -----------------------------------------

. label var id_plant "Unique Plant ID (1-318)"

. drop _merge

. order id_plant id_gpcb

. 
. ** Export for MATLAB
. * 2,946 observations, 58 variables
. gsort -D_treatment -D_analysis id_gpcb period_num

. save "$MODEL_DATA_OUT/structural_model_input.dta", replace
(file 02DataPipeline/model/structural_model_input.dta not found)
file 02DataPipeline/model/structural_model_input.dta saved

. export delimited using "$MODEL_DATA_OUT/structural_model_input.txt", nolabel delimiter(tab) replace
(file 02DataPipeline/model/structural_model_input.txt not found)
file 02DataPipeline/model/structural_model_input.txt saved

. 
end of do-file

. do "$CODE_DIR/cleaning/prepare_bids_data.do"

. /************************************************************************
> Purpose: Preparing bids data set (simulated bids counterpart)
> *************************************************************************/
. 
. clear all

. clear matrix

. set more off

. set linesize 255

. pause on

. 
. ** Get plant-period level variables from structural_model_input.dta
. use "$MODEL_DATA_OUT/structural_model_input.dta", clear
(337 STACKS. MASTER T&C BALANCED PANEL. HIST CALIB FACTORS.)

. keep if D_trade == 1
(1,386 observations deleted)

. 
. 
. * Rename variables
. rename heatoutput Hi

. rename emission_val_prorated Eit

. rename pm_mass_potential_max_16 Ebari

. rename permit_alloc_prorated Ait

. 
. * Generate the share of allocated quantity of a plant in a period
. rename period_num id_period

. bysort id_period: egen total_allocation = sum(Ait)

. gen Ai_share = Ait / total_allocation

. label var Ai_share "allocated quantity / total allocation"

. 
. keep id_plant id_gpcb id_period Hi Ait Ai_share Eit Ebari apcd_max  

. order id_plant id_gpcb id_period Hi Ait Ai_share Eit Ebari apcd_max

. duplicates drop

Duplicates in terms of all variables

(0 observations are duplicates)

. sort id_plant id_period

. tempfile plant_period_variables

. save `plant_period_variables'
file /var/folders/7r/1zsrb09s0r578bq3nysvhzd40000gn/T//St40526.000001 saved as .dta format

. 
. 
. ** Get plant-period-bid level variables from panel_plant-period-date-bid.dta
. use "$TRADING_DATA_CLEAN/panel_plant-period-date-bid.dta", clear

. 
. * Rename variables
. rename gpcb_id id_gpcb

. rename commodity id_commodity

. rename period id_period

. rename bid_price bid_p

. rename bid_qty bid_q

. rename permit_holding_hyp bid_qE

. rename week_num bid_week

. rename max_week_num num_weeks_period

. rename is_auction D_auction

. rename consumed_period period_emissions

. 
. * Generate a new Bid ID
. bysort id_gpcb id_period (bid_id): gen id_bid = _n

. replace id_bid = . if missing(bid_q) & missing(trade_qty)
(3 real changes made, 3 to missing)

. label var id_bid "Bid ID"

. drop bid_id

. 
. * Add an indicator of whether a bid is placed in the first half of a period.
. gen D_first_half = 0

. replace D_first_half = 1 if bid_day_norm <= 0.5
(3,120 real changes made)

. replace D_first_half = . if bid_day_norm == .
(3 real changes made, 3 to missing)

. label var D_first_half "=1 if a bid is placed in the first half of a period"

. 
. * Count the number of bids a plant placed in a period
. bysort id_gpcb id_period: gen bid_count = _N

. label var bid_count "Number of bids placed in the period"

. 
. keep id_gpcb id_period id_bid bid_count bid_qE bid_q bid_p bid_day_norm bid_week num_weeks_period D_first_half D_auction period_emissions D_cyc-D_esp

. 
. ** Merge data sets
. merge m:1 id_gpcb id_period using `plant_period_variables'

    Result                      Number of obs
    -----------------------------------------
    Not matched                           192
        from master                         3  (_merge==1)
        from using                        189  (_merge==2)

    Matched                             8,433  (_merge==3)
    -----------------------------------------

. order id_plant id_gpcb id_period id_bid bid_count bid_qE bid_q bid_p bid_day_norm bid_week num_weeks_period D_first_half period_emissions

. drop if _merge == 1
(3 observations deleted)

. drop _merge

. sort id_gpcb id_period id_bid

. replace bid_count = 0 if missing(id_bid)
(189 real changes made)

. 
. save "$MODEL_DATA_OUT/bids.dta", replace
(file 02DataPipeline/model/bids.dta not found)
file 02DataPipeline/model/bids.dta saved

. export delimited using "$MODEL_DATA_OUT/bids.txt", nolabel delimiter(tab) replace
(file 02DataPipeline/model/bids.txt not found)
file 02DataPipeline/model/bids.txt saved

. 
end of do-file

. do "$CODE_DIR/cleaning/compute_bid_price_statistics.do"

. /************************************************************************
> Purpose:        Generate a data set of the statistics of period prices
> *************************************************************************/
. 
. set more off

. clear all

. pause on

. 
. use "$TRADING_DATA_CLEAN/panel_plant-period-date-bid.dta", replace

. 
. *** Currently we are computing the simple average of bid prices over all bids.
. *** Not sure if it makes more sense to compute average weighted by bid quantity.
. 
. bysort period: egen price_clearing_mean = mean(trade_price)
(3 missing values generated)

. bysort period: egen price_clearing_median = median(trade_price)
(3 missing values generated)

. bysort period: egen price_bid_mean = mean(bid_price)
(3 missing values generated)

. bysort period: egen price_bid_median = median(bid_price)
(3 missing values generated)

. keep period price_clearing_mean price_clearing_median price_bid_mean price_bid_median compliance_month_cap

. rename compliance_month_cap period_cap

. rename period id_period

. duplicates drop

Duplicates in terms of all variables

(8,425 observations deleted)

. drop if id_period == .
(1 observation deleted)

. save "$MODEL_DATA_OUT/period_statistics.dta", replace
(file 02DataPipeline/model/period_statistics.dta not found)
file 02DataPipeline/model/period_statistics.dta saved

. export delimited using "$MODEL_DATA_OUT/period_statistics.txt", nolabel delimiter(tab) replace
(file 02DataPipeline/model/period_statistics.txt not found)
file 02DataPipeline/model/period_statistics.txt saved

. 
end of do-file

. 
end of do-file
